/*
Input: KommW2014_Strassenverzeichnis.xlsx
	> electoral roll 2014: raw addresses of all eligible voters and their assigned precinct


Output: tmp/strassen_kow14_voll_buchstabe.dta

Main task:
	> import, clean, and identify street segments that are assigned to a given precinct 
			-> a segment is equal to a streetname + range of numbers (e.g., Mystreet 6-18)
		>> note: a maximum number is not always given (e.g., singletons and "from 6 onwards" cases) 
			-> we generate max numbers by hand (e.g., 6 -> 6-6 and "from 6" -> 6-320 (hypothetical max))
		
	> from street segments extend data to include each single address (e.g., Mystreet 6, Mystreet 8, etc.)
		-> several cases to be considered:
		0. streets without any numbers // not the case in 2017
		 . address named exactly (border cases of the segments: if 6-10a -> 6 and 10a)
		1. beginning and ending of address contain only numbers and no letters (e.g., 1-15)
		 If segments with letters, different cases:
			2. only beginning of the segment contains a letter (e.g., 16a-24)
				3. only ending of the segment contains a letter (e.g., 16-24f)
			If segments contain letter at the beginning and the ending, different cases:
			4. assignment given for exactly one address (same street and same letter), e.g. 16a-16a
			5. assignment given for exactly one street (but with different addresses (letters)), e.g. 17b-17e
			6. first and second mentioned address contain letter and both have different numbers (e.g., 8b-11d)

*/



************************************************************************
**** 1. import, clean, and prepare for merging
************************************************************************
import excel using "$rawdata/election_office/electoral_rolls/KommW2014_Strassenverzeichnis.xlsx", clear cellrange(A1:G6600) // first

*clean file and rename
	rename A v1
	rename B v2
	rename C v3
	rename D v4
	rename E v5
	rename F v6
	rename G v7
	drop v2 v4 v6

/* This is how the first five rows look like. (for comparison to 2009, 2013, 2014)

v1	v3	v5	v7
S t a d t b e z i r k   01	Neuturmstr.	Burgstr.	Westenriederstr.
	Odeonspl.	Dienerstr.	6  -  29   fortl.
S t i m m b e z i r k   0101	ab       6   fortl.	Dreifaltigkeitspl.	
	Orlandostr.	Falkenturmstr.	
Wahllokal:	Pfisterstr.	2	
*/

* generate a running number (will be necessary for creating a "page")
	gen num = _n

* define first row of each page (all pages have the same number of rows (40 + 1 empty, where the next page starts))
	gen tmp1 = mod(num,41)
	gen tmp2 = tmp1==0

* drop empty rows
	drop if tmp2==1
	cap drop tmp2 tmp3
*generate page variable for each observation
	gen page = (num - tmp1)/41 + 1
	drop num tmp1
	
*summarize to get the maximum (=page number)
	sum page //max 161
	local max = r(max)
*re-organize data: save each column and page separately
	foreach v in "1" "3" "5" "7" { //columns
		forvalues p=1/`max' { //pages
			
			frame copy default tmp, replace
			frame tmp {
				keep if page==`p'
				keep v`v'
				rename v`v' street
				save "$tmp/strassen_14_v`v'_page`p'", replace
			}
		}
	}



*combine data again: by column and page 
use "$tmp/strassen_14_v1_page1", clear

	append using "$tmp/strassen_14_v3_page1"
	append using "$tmp/strassen_14_v5_page1"
	append using "$tmp/strassen_14_v7_page1"

	forvalues p=2/`max' {
		foreach v in "1" "3" "5" "7" {

			append using "$tmp/strassen_14_v`v'_page`p'"
		
		}
	}

*erase unnecessary files
	foreach v in "1" "3" "5" "7" { //columns
		forvalues p=1/`max' { //pages
			erase "$tmp/strassen_14_v`v'_page`p'.dta"
		}
	}


// first: genenerate districts and precinct IDs
	gen stadtbez = subinstr(street,"S t a d t b e z i r k","",.) if substr(street, 1, 21)=="S t a d t b e z i r k"
	lab var stadtbez "city district ID"
	replace stadtbez = subinstr(stadtbez," ","",.) 
	replace stadtbez = subinstr(stadtbez,"0","",.) if substr(stadtbez,1,1) =="0"
	gen sb = subinstr(street,"S t i m m b e z i r k","",.) if substr(street, 1, 21)=="S t i m m b e z i r k"

*clean street
	replace street = subinstr(street, " ", "", .)

// second: drop polling place addresses and keep only addresses of eligible voters
// polling places are always directly below the precinct
*generate polling places to drop them
	gen wahllokal_tmp = "wahllokal" if substr(street, 1, 10)=="Wahllokal:"
*fill rows below
	replace wahllokal_tmp = wahllokal_tmp[_n-1] if missing(street)==0 & wahllokal_tmp != "wahllokal"
*drop rows with polling place
	drop if wahllokal_tmp=="wahllokal"
	drop wahllokal_tmp

*select rows in the street variable that will be dropped because no street is named but only the district or precinct
	replace street = "" if !missing(stadtbez) | !missing(sb)


*assign districts and precints to streets
	replace stadtbez = stadtbez[_n-1] if missing(stadtbez)
	replace sb = sb[_n-1] if missing(sb)
	lab var sb "raw precinct id"

*drop observations where no street is named (coming from district or precinct rows)
	drop if missing(street)
	
*gen voter address (street) number: either start with a number or start with "ab" (=from)
	gen hnr_orig_segment = street	if inlist(substr(street, 1, 1), "0", "1", "2", "3", "4") | inlist(substr(street, 1, 1), "5", "6", "7", "8", "9") 
	replace hnr_orig_segment = street if substr(street, 1, 2) == "ab"

	
	
	
* clean voter street: if there is a number of the address, it is in the next row
* drop the information in the rows where the numbers/segments are given
	replace street="." if missing(hnr_orig_segment)==0
*generate a preliminary street below the street (where the number/segment stands)
	gen street_tmp = street[_n-1] if street=="."
* add it again if there is more than one segment to the same street
	replace street_tmp = street_tmp[_n-1] if street_tmp=="."
*replace the original street if only segments are given and the street is now in the temporary "strasse1"
	replace street="." if street[_n+1]=="."
*replace the street variable with the temporary variable
	replace street = street_tmp if street == "."
*drop temporary variable
	drop street_tmp
*drop the row where the street above the segment information was listed before
	drop if missing(street) // if the address goes over more than one row, the duplicate is dropped here


* correcting small typos
	replace street = subinstr(street,"?","",.)

* update typing of (long) street names (checked manually)
	replace street="Hildegard-von-Bingen-Anger" if street=="Hild.-v.-Bingen-Ang."
	replace street="Rondell Neuwittelsbach" if street=="Rond.Neuwittelsbach"
	replace street="Franz-Josef-Strauß-Ring" if street=="F.-J.-Strauß-Ring"
	replace street="Hedwig-Dransfeld-Allee" if street=="Hedwig-Dransfeld-A."
	replace street="Karl-Amadeus-Hartmann-Weg" if street=="Karl-A.-Hartmann-Weg"
	replace street="Geschwister-Scholl-Pl." if street=="Geschw.-Scholl-Pl."
	replace street="Walter-Klingenbeck-Weg" if street=="Walter-Klingenbeck-W"
	replace street="Heinrich-Stieglitz-Kehre" if street=="Heinr.-Stiegl.-Kehre"
	replace street="Jean-Paul-Richter-Str." if street=="Jean-Paul-Richter-St"
	replace street="Hermann-von-Sicherer-Str." if street=="Herm.v.SichererStr."
	replace street="Christoph-Rapparini-Bogen" if street=="Christ.-Rapparini-Bg"
	replace street="Werner-Friedmann-Bogen" if street=="Werner-Friedmann-Bg"
	replace street="Friedenheimer Brücke" if street=="FriedenheimerBrü."
	replace street="Johann-Sebastian-Bach-Str." if street=="Johann-Seb-Bach-Str."
	replace street="Maximilian-Wetzger-Str." if street=="Maximilian-Wetzger-S"
	replace street="Nördliche Auffahrtsallee" if street=="Ndl.-Auffahrtsallee"
	replace street="Nördliches Schloßrondell" if street=="Ndl.-Schloßrondell"
	replace street="Rainer-Werner-Fassbinder-Pl." if street=="R.-W.-Fassbinder-Pl."
	replace street="Südliches Schloßrondell" if street=="Suedl.Schloßrondell"
	replace street="Ehrenbreitsteiner Str." if street=="EhrenbreitsteinerSt"
	replace street="Jakob-Hagenbucher-Str." if street=="Jak.-Hagenbucher-Str"
	replace street="Moosacher St.-Martins-Pl." if street=="MoosachSt-Martins-P"
	replace street="Christoph-von-Gluck-Pl." if street=="Christ.-von-Gluck-Pl"
	replace street="Erwin-von-Steinbach-Weg" if street=="Erw.v.-Steinbach-Weg"
	replace street="Martin-Luther-King-Weg" if street=="Mart.Luther-King-Weg"
	replace street="Werner-Seelenbinder-Weg" if street=="W.-Seelenbinder-Weg"
	replace street="Alois-Wolfmüller-Str." if street=="Alois-Wolfmueller-St"
	replace street="Christoph-Probst-Str." if street=="Christ.-Probst-Str."
	replace street="Elisabeth-Selbert-Str." if street=="Elisabeth-Selbert-St"
	replace street="Freimanner Bahnhofstr." if street=="FreimannerBahnhofst"
	replace street="Heinrich-Geißler-Str." if street=="Heinr.-Geißler-Str."
	replace street="Karl-Köglsperger-Str." if street=="Karl-Köglsperger-St"
	replace street="Peter-Paul-Althaus-Str." if street=="Peter-Paul-Althaus-S"
	replace street="Werner-Heisenberg-Allee" if street=="W.-Heisenberg-Allee"
	replace street="Adolf-Oberländer-Weg" if street=="Adolf-Oberländer-We"
	replace street="Anschütz-Kaempfe-Str." if street=="Anschuetz-Kaempfe-S"
	replace street="Bernhard-Lichtenberg-Weg" if street=="Bernh.-Lichtenberg-W"
	replace street="Else-Lasker-Schüler-Str." if street=="E.-Lasker-Schüler-S"
	replace street="Eugen-Kalkschmidt-Weg" if street=="Eug.-Kalkschmidt-Weg"
	replace street="Friedrich-Herschel-Str." if street=="Friedr.-Herschel-Str"
	replace street="Gertrud-von-Le-Fort-Weg" if street=="Gertr.-v-Le-Fort-Weg"
	replace street="Hugo-von-Hofmannsthal-Str." if street=="H.-v.-Hofmannsthal-S"
	replace street="Johannes-Neuhäusler-Str." if street=="Joh.-Neuhäusler-Str"
	replace street="Marie-Luise-Kaschnitz-Str." if street=="Marie-L.-Kaschnitz-S"
	replace street="Wolfgang-Borchert-Weg" if street=="Wolfg.-Borchert-Weg"
	replace street="Johann-Michael-Fischer-Pl." if street=="Joh.-Mich.-Fischer-P"
	replace street="Bürgermeister-Keller-Str." if street=="Bürgerm.-Keller-Str"
	replace street="Elisabeth-Mann-Borgese-Str." if street=="Elis.-Mann-Borgese-S"
	replace street="Elisabeth-zu-Guttenberg-Str." if street=="Elis.-z-Guttenberg-S"
	replace street="Friedrich-Creuzer-Str." if street=="Friedr.-Creuzer-Str"
	replace street="Herzogstandstr. (Gronsdorf)" if street=="Herzogstandstr.-Grdf"
	replace street="Maria-Montessori-Str." if street=="M.-Montessori-Str."
	replace street="Albert-Schweitzer-Str." if street=="Albert-Schweitzer-St"
	replace street="Arnold-Sommerfeld-Str." if street=="Arnold-Sommerfeld-St"
	replace street="Charles-de-Gaulle-Str." if street=="Charles-de-Gaulle-St"
	replace street="Dr.-Walther-von-Miller-Str." if street=="Dr-Walth.-v-Miller-S"
	replace street="Friedrich-Engels-Bogen" if street=="Friedr.-Engels-Bogen"
	replace street="Gerhart-Hauptmann-Ring" if street=="Gerh.-Hauptmann-Ring"
	replace street="Gertrud-Kückelmann-Weg" if street=="Gertr.-Kückelmann-W"
	replace street="Hans-Pössenbacher-Weg" if street=="Hans-Pössenbacher-W"
	replace street="Joseph-Maria-Lutz-Anger" if street=="Jos-Maria-Lutz-Anger"
	replace street="Marieluise-Fleißer-Bogen" if street=="Mariel.-Fleißer-Bog"
	replace street="Maximilian-Kolbe-Allee" if street=="Maximil.-Kolbe-Allee"
	replace street="Peter-Lühr-Str." if street=="Peter-Luehr-Str."
	replace street="Stephanskirchener Str." if street=="StephanskirchenerSt"
	replace street="Heinrich-Kröller-Str." if street=="Heinr.-Kröller-Str."
	replace street="Alpspitzstr. (Pullach)" if street=="Alpspitzstr.-Pullach"
	replace street="Höllriegelskreuther Str." if street=="Hoellriegelskr.Str."
	replace street="Vinzenz-Schüpfer-Str." if street=="Vinzenz-Schüpfer-St"
	replace street="Holzapfelkreuther Str." if street=="HolzapfelkreutherSt"
	replace street="Ludwig-Wörl-Weg" if street=="Ludwig-Woerl-Weg"
	replace street="Annelies-Kupper-Allee" if street=="Annelies-Kupper-Alle"
	replace street="Philippine-Schick-Allee" if street=="Philipp.-Schick-Alle"
	replace street="Richard-Riemerschmid-Allee" if street=="Rich-Riemerschmid-Al"
	replace street="Am Aubinger Wasserturm" if street=="AmAubingerWassert."
	replace street="Friedrichshafener Str." if street=="FriedrichshafenerSt"
	replace street="Josef-Steinbacher-Weg" if street=="Jos.-Steinbacher-Weg"
	replace street="Margarete-Kliemann-Weg" if street=="Marg.-Kliemann-Weg"
	replace street="Walter-Schnackenberg-Weg" if street=="Walt-Schnackenberg-W"
	replace street="Albert-Pfretzschner-Weg" if street=="Albert-Pfretzschn.-W"
	replace street="Ernst-von-Romberg-Str." if street=="Ernst-von-Romberg-St"
	replace street="Georg-Reismüller-Str." if street=="Gg.-Reismüller-Str."
	replace street="Nikolaus-Rüdinger-Str." if street=="Nikol.-Rüdinger-Str"
	replace street="Peter-Stegmüller-Weg" if street=="Peter-Stegmüller-Wg"
	replace street="Auf den Schrederwiesen" if street=="Aufd.Schrederwiesen"
	replace street="Berthold-Litzmann-Str." if street=="Berthold-Litzmann-St"
	replace street="Ignatius-Blenninger-Str." if street=="Ignat.-Blenninger-St"
	replace street="Ludwig-Gramminger-Str." if street=="Ludw.-Gramminger-Str"
	replace street="Paula-Breitenbach-Weg" if street=="Paula-Breitenbach-Wg"
	replace street="Steinröschenstr." if street=="Steinroeschenstr."
	replace street="Wilhelmine-Reichard-Str." if street=="Wilhelm.-Reichard-S."
	replace street="Johannes-Scharrer-Str." if street=="Johannes-Scharrer-St"
	replace street="Johann-Karg-Straße (Haar)" if street=="Joh.-Karg-Str.(Haar)"
	replace street="Roßsteinstraße" if street=="Rosssteinstr."
	replace street="Wolfratshauser Straße (Pul)" if street=="Wolfratsh.Str.(Pul)"
	replace street="Magdalena-Bräu-Weg" if street=="Magdalena-Braeu-Weg"
	replace street="Margarete-Schütte-Lihotzky-Straße" if street=="Mar.-Schuette-Lih.-S"
	replace street="Schneiderhofstraße (Gronsdorf)" if street=="Schneiderhofstr.(GR)"
	replace street="Gottfried-Koelwel-Weg" if street=="Gottfr.-Koelwel-Weg"
	replace street="Georg-Kerschensteiner-Straße" if street=="Georg-Kerschenst.-S."
	replace street="Schnorr-von-Carolsfeld-Straße" if street=="Sch.v-Carolsfeld-Str"
	replace street="Zur Deutschen Einheit" if street=="Z.DeutschenEinheit"
	replace street="Südliche Auffahrtsallee" if street=="Suedl.Auffahrtsallee"
	replace street="Sämannstraße (Gräfelfing)" if street=="Saemannstr.(Graef.)"
	replace street="Am Hartmannshofer Bächl" if street=="AmHartmannsh.Bächl"
	replace street="Landaubogen" if street=="Leonhard-Moll-Bogen" // re-name by the city for historical reseason between 2014 and 2017
	replace street="Ilse-Weber-Str." if street=="Paul-Lagarde-Str." // re-name by the city for historical reseason between 2014 and 2017
	replace street="Brunnthaler Weg" if street=="Friedrich-Berber-Weg" // re-name by the city between 2014 and 2017


*Change street names for geocoding
	replace street = subinstr(street, "Kard.", "Kardinal", .)     
	replace street = subinstr(street, "d.", "der", .)
	replace street = subinstr(street, "Alb.", "Albert", .)     
	replace street = subinstr(street, "b.", "burger", .)     
	replace street = subinstr(street, "-Bog", "Bogen", .)     
	replace street = subinstr(street, "B.", "Bogen", .)     
	replace street = subinstr(street, "Br.", "Brücke", .)     
	replace street = subinstr(street, "Bog.", "Bogen", .)     
	replace street = subinstr(street, "Bogenen", "Bogen", .)     
	replace street = subinstr(street, "Wink.", "Winkel", .)     
	replace street = subinstr(street, "A.", "An", .)     
	replace street = subinstr(street, "-S.", "-Str.", .)     
	replace street = subinstr(street, "-Rg.", "Ring", .)     
	replace street = subinstr(street, "-Rg", "Ring", .)     
	replace street = subinstr(street, "v.", "von", .)     
	replace street = subinstr(street, "-v-", "von", .)     
	replace street = subinstr(street, "Wg", "Weg", .)    if  substr(street, strlen(street)-1, 2) =="Wg"
	replace street = subinstr(street, "W.", "Weg", .)    if  substr(street, strlen(street)-1, 2) =="W."
	replace street = subinstr(street, "W", "Weg", .)    if  substr(street, strlen(street), 1) =="W"
	replace street = subinstr(street, "wg", "weg", .)    if  substr(street, strlen(street)-1, 2) =="wg"
	replace street = subinstr(street, "P", "Platz", .)    if  substr(street, strlen(street), 1) =="P"
	replace street = subinstr(street, "St", "Straße", .)    if  substr(street, strlen(street)-1, 2) =="St"

	replace street="An der Streicherbreiten" if street=="A.d.Streicherbreiten"
	replace street="An der Hauptfeuerwache" if street=="And.Hauptfeuerwache"

* Further Change street names for geocoding
	replace street = subinstr(street,"St.","Sankt",.)
	replace street = subinstr(street,"Südl.","Südliche",.)
	replace street = subinstr(street,"Alle","Allee",.)
	replace street = subinstr(street,"Alleee","Allee",.)
	replace street = subinstr(street,"pl.","platz",.)
	replace street = subinstr(street,"Pl.","Platz",.)
	replace street = subinstr(street,"str.","straße",.)
	replace street = subinstr(street,"Str.","Straße",.)
	replace street = subinstr(street,"straße","strasse",.)
	replace street = subinstr(street,"Straße","Strasse",.)

	replace street = street + "asse" if lower(substr(street, strlen(street)-2, 3)) =="str"
	replace street = street + "atz" if lower(substr(street, strlen(street)-1, 2)) =="pl"

	replace street="Allensteiner Strasse" if street=="AlleensteinerStrasse"
	replace street="Allescherstrasse" if street=="Alleescherstrasse"


* gen: merge_strasse := harmonized street name
	rename 	street merge_strasse
	lab var merge_strasse "harmonized street id for merging"

	replace merge_strasse = subinstr(merge_strasse,"Dr.","Doktor",.)
	replace merge_strasse = subinstr(merge_strasse,"Prof.","Professor",.)
	replace merge_strasse = subinstr(merge_strasse," ","",.)
	replace merge_strasse = lower(merge_strasse)
	replace merge_strasse = subinstr(merge_strasse,"-","",.)
	replace merge_strasse = subinstr(merge_strasse,".","",.)
	replace merge_strasse = subinstr(merge_strasse,"'","",.)
	replace merge_strasse = subinstr(merge_strasse,"ß","ss",.)
	replace merge_strasse = subinstr(merge_strasse,"ä","ae",.)
	replace merge_strasse = subinstr(merge_strasse,"ö","oe",.)
	replace merge_strasse = subinstr(merge_strasse,"ü","ue",.)
	

* different from later years: segments not only defined by "-", but also by "from" ("ab"), "even" ("gerade"), "odd" ("unger."), "consecutive" ("fortl.")
	gen ab = strpos(hnr_orig_segment, "ab")
	gen even = strpos(hnr_orig_segment, "gerade")
	gen odd = strpos(hnr_orig_segment, "unger.")
	gen fort = strpos(hnr_orig_segment, "fortl.")
	gen minus = strpos(hnr_orig_segment, "-")

	assert ab==1 | ab==0
	assert even==0 & odd==0 if fort>0
	assert even==0 & fort==0 if odd>0
	assert odd==0 & fort==0 if even>0
	assert fort==even==odd==0 if ab==0& minus==0

** Extract address numbers from entries, e.g., 	'2-4gerade', 'ab26fortl.' etc.
*	gen hnr_min / hnr_max := start number in given street 
*	gen buchstaben_min /buchstaben_max := (optional) letter attachted to street number, e.g., 44A 
*	when whole street is assigned to a precinct, hnr_orig_segment is empty

* get starting number
	*from case
	gen hnr_orig_segment1 = substr(hnr_orig_segment, ab+2, .) if ab==1
	* "-" (minus) case"
	replace hnr_orig_segment1 = substr(hnr_orig_segment, 1, minus-1) if ab==0
	*"raw" case: only one number given
	*that is different to later years. it is possible that a singleton address is stated (6 instead of 6-6)
	replace hnr_orig_segment1 = hnr_orig_segment if ab==0 & minus==0
*assert that all cases are considered
	assert missing(hnr_orig_segment)==missing(hnr_orig_segment1)

* get ending number
	gen hnr_orig_segment2 = substr(hnr_orig_segment, minus+1, .) if ab==0
*assert that all cases are considered
	assert missing(hnr_orig_segment)==missing(hnr_orig_segment2) | ab==1
	drop if hnr_orig_segment2 == "0" // drop if 0 is the number (does not exist)

	*assert typical typos
	assert substr(hnr_orig_segment2,1,1)!="0"
	assert strpos(hnr_orig_segment1, ",")==0

* change that not for all addresses a maximum number is given
*singleton addresses
	assert hnr_orig_segment==hnr_orig_segment1 if ab==0 & minus==0
	replace hnr_orig_segment2 = hnr_orig_segment if ab==0 & minus==0
*and that not necessarily no maximum is given
	assert  missing(hnr_orig_segment2) if missing(hnr_orig_segment)
	replace hnr_orig_segment2 = "320" if missing(hnr_orig_segment2) & even>0
	replace hnr_orig_segment2 = "321" if missing(hnr_orig_segment2) & odd>0
	replace hnr_orig_segment2 = "321" if missing(hnr_orig_segment2) & fort>0

* take only numeric part from the starting and ending hnr
*first drop known strings
	foreach c in "gerade" "unger." "fortl." {
		replace hnr_orig_segment1 = subinstr(hnr_orig_segment1,"`c'","",.)
		replace hnr_orig_segment2 = subinstr(hnr_orig_segment2,"`c'","",.)
	}
*then generate numierc part
	gen hnr_min = real(ustrregexs(1)) if ustrregexm(hnr_orig_segment1,"([0-9]+)")
	gen hnr_max = real(ustrregexs(1)) if ustrregexm(hnr_orig_segment2,"([0-9]+)")
	tostring hnr_min hnr_max, replace

* get remaining characters from single components (=address letters)
	gen buchstaben_min = subinstr(hnr_orig_segment1, hnr_min, "", .)
	gen buchstaben_max = subinstr(hnr_orig_segment2, hnr_max, "", .)
	replace buchstaben_min=lower(buchstaben_min)
	replace buchstaben_max=lower(buchstaben_max)
	destring hnr_min hnr_max, replace


************************************************************************
**** 2. start generating addresses// from here very similar to the other elections
************************************************************************
//go through different cases
frame copy default tmp, replace
frame tmp {
*case0: streets without numbers
	keep if missing(hnr_orig_segment)
	
* keep variables
	keep stadtbez sb merge_strasse
	
* create duplicates for each street to add numbers
	expand 321 // 321 is chosen as no street contains a higher number (checked)

* generate address number
	bys stadtbez merge_strasse: assert _N==321
	bys stadtbez merge_strasse: gen hnr = _n
	
* create duplicates to add letters (a-z) to each number
	expand 27
	bys stadtbez merge_strasse hnr: gen buchstabe = _n
	tostring buchstabe, replace

* recode the numeric values to letters
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	replace buchstabe="" if buchstabe=="27"
	
*save tempfile (appended below)
	tempfile full_buchstabe 
	save `full_buchstabe'
}



* drop cases dealt with
	drop if missing(hnr_orig_segment)
	assert !missing(hnr_min)
	assert !missing(hnr_max)

* destring 
	destring hnr_min hnr_max, replace

*addresses with high numbers and ab/from corrected manually
	replace hnr_max = 337 if hnr_orig_segment=="ab333unger." & merge_strasse=="fuerstenriederstrasse"
	replace hnr_max = 425 if hnr_orig_segment=="ab365unger." & merge_strasse=="riemerstrasse"
	replace hnr_max = 570 if hnr_orig_segment=="ab542gerade" & merge_strasse=="dachauerstrasse"
	replace hnr_max = 394 if hnr_orig_segment=="ab384fortl." & merge_strasse=="balanstrasse"
	replace hnr_max = 386 if hnr_orig_segment=="ab340gerade" & merge_strasse=="hoeglwoertherstrasse"
	replace hnr_max = 391 if hnr_orig_segment=="ab345unger." & merge_strasse=="hoeglwoertherstrasse"
	replace hnr_max = 529 if hnr_orig_segment=="ab515unger." & merge_strasse=="landsbergerstrasse"
	replace hnr_max = 443 if hnr_orig_segment=="ab400fortl." & merge_strasse=="feldmochingerstrasse"
	replace hnr_max = 667 if hnr_orig_segment=="ab641unger." & merge_strasse=="dachauerstrasse"
	replace hnr_max = 334 if hnr_orig_segment=="ab332gerade" & merge_strasse=="lerchenauerstrasse"
	replace hnr_max = 345 if hnr_orig_segment=="ab335unger." & merge_strasse=="lerchenauerstrasse"
	assert hnr_min<=hnr_max

frame copy default tmp, replace
frame tmp {
*case.x: address named exactly (border cases of the segments: if 6-10a -> 6 and 10a)
* expand for first or second part
	expand 2

* gen identifier for first and second part
	bys stadtbez sb merge_strasse hnr_orig_segment: gen tmp1 = _n // 1 or 2
	bys stadtbez sb merge_strasse hnr_orig_segment: assert tmp1<=2
	
* gen address number (numeric and letter part)
	gen hnr = hnr_min if tmp1==1
	replace hnr = hnr_max if tmp1==2
	gen buchstabe = "."
	replace buchstabe = buchstaben_min if tmp1==1
	replace buchstabe = buchstaben_max if tmp1==2
	drop tmp1

* keep relevant variables
	keep merge_strasse hnr buchstabe sb stadtbez

* drop duplicates if min and max are equal
	bys merge_strasse hnr buchstabe sb stadtbez: keep if _n==1 

*save tempfile (appended below)
	tempfile exact 
	save `exact'
}
	
	
// split segments to the corresponding numbers (what is afterward still missing are potential letters belonging to addresses)
* generate a variable containing how many different numbers a segment contains
	gen tmp1 = hnr_max - hnr_min + 1

* generate for each number a segment contains duplicates 
	expand tmp1
	drop tmp1

* tmp3: generate the address number
	bys stadtbez merge_strasse hnr_orig_segment: gen tmp3 = _n
	replace tmp3 = tmp3 - 1 + hnr_min

* segments contain only even/odd numbers. generated numbers that do not fit are removed 
	drop if odd>0 & mod(tmp3, 2) == 0
	drop if even>0 & mod(tmp3, 2) == 1

* drop
	drop hnr_orig_segment
	
* rename address number variable 
	rename tmp3 hnr


frame copy default tmp, replace
frame tmp {
*case1: beginning and ending of segment contain only numbers and no letters
	keep if missing(buchstaben_min) & missing(buchstaben_max)
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n

* recode the numeric values to letters again
	tostring buchstabe, replace
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	replace buchstabe="" if buchstabe=="27"
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max 

*save tempfile (appended below)
	tempfile no_buchstabe 
	save `no_buchstabe'
}


* drop cases dealt with
	drop if missing(buchstaben_min) & missing(buchstaben_max)

frame copy default tmp, replace
frame tmp {
*case 2: only beginning of the segment contains a letter (buchstaben_min)
	keep if missing(buchstaben_min)==0 & missing(buchstaben_max)
	
* generate numeric variable for the letter mentioned in the address (buchstaben_min)
	gen tmp_buch = "1" if buchstaben_min=="a"
	replace tmp_buch = "2" if buchstaben_min=="b"
	replace tmp_buch = "3" if buchstaben_min=="c"
	replace tmp_buch = "4" if buchstaben_min=="d"
	replace tmp_buch = "5" if buchstaben_min=="e"
	replace tmp_buch = "6" if buchstaben_min=="f"
	replace tmp_buch = "7" if buchstaben_min=="g"
	replace tmp_buch = "8" if buchstaben_min=="h"
	replace tmp_buch = "9" if buchstaben_min=="i"
	replace tmp_buch = "10" if buchstaben_min=="j"
	replace tmp_buch = "11" if buchstaben_min=="k"
	replace tmp_buch = "12" if buchstaben_min=="l"
	replace tmp_buch = "13" if buchstaben_min=="m"
	replace tmp_buch = "14" if buchstaben_min=="n"
	replace tmp_buch = "15" if buchstaben_min=="o"
	replace tmp_buch = "16" if buchstaben_min=="p"
	replace tmp_buch = "17" if buchstaben_min=="q"
	replace tmp_buch = "18" if buchstaben_min=="r"
	replace tmp_buch = "19" if buchstaben_min=="s"
	replace tmp_buch = "20" if buchstaben_min=="t"
	replace tmp_buch = "21" if buchstaben_min=="u"
	replace tmp_buch = "22" if buchstaben_min=="v"
	replace tmp_buch = "23" if buchstaben_min=="w"
	replace tmp_buch = "24" if buchstaben_min=="x"
	replace tmp_buch = "25" if buchstaben_min=="y"
	replace tmp_buch = "26" if buchstaben_min=="z"
	destring tmp_buch, replace
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	replace buchstabe = buchstabe-1
	drop if buchstabe < tmp_buch & hnr==hnr_min

* recode the numeric values to letters again
	tostring buchstabe, replace
	replace buchstabe="" if buchstabe=="0"
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch
	
*save tempfile (appended below)
	tempfile buchstabe_min
	save `buchstabe_min'
}


frame copy default tmp, replace
frame tmp {
*case 3: only ending of the segment contains a letter (buchstaben_max)
	keep if missing(buchstaben_min) & missing(buchstaben_max)==0
	
* generate numeric variable for the letter mentioned in the address (buchstaben_max)
	gen tmp_buch = "1" if buchstaben_max=="a"
	replace tmp_buch = "2" if buchstaben_max=="b"
	replace tmp_buch = "3" if buchstaben_max=="c"
	replace tmp_buch = "4" if buchstaben_max=="d"
	replace tmp_buch = "5" if buchstaben_max=="e"
	replace tmp_buch = "6" if buchstaben_max=="f"
	replace tmp_buch = "7" if buchstaben_max=="g"
	replace tmp_buch = "8" if buchstaben_max=="h"
	replace tmp_buch = "9" if buchstaben_max=="i"
	replace tmp_buch = "10" if buchstaben_max=="j"
	replace tmp_buch = "11" if buchstaben_max=="k"
	replace tmp_buch = "12" if buchstaben_max=="l"
	replace tmp_buch = "13" if buchstaben_max=="m"
	replace tmp_buch = "14" if buchstaben_max=="n"
	replace tmp_buch = "15" if buchstaben_max=="o"
	replace tmp_buch = "16" if buchstaben_max=="p"
	replace tmp_buch = "17" if buchstaben_max=="q"
	replace tmp_buch = "18" if buchstaben_max=="r"
	replace tmp_buch = "19" if buchstaben_max=="s"
	replace tmp_buch = "20" if buchstaben_max=="t"
	replace tmp_buch = "21" if buchstaben_max=="u"
	replace tmp_buch = "22" if buchstaben_max=="v"
	replace tmp_buch = "23" if buchstaben_max=="w"
	replace tmp_buch = "24" if buchstaben_max=="x"
	replace tmp_buch = "25" if buchstaben_max=="y"
	replace tmp_buch = "26" if buchstaben_max=="z"
	destring tmp_buch, replace
	
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	replace buchstabe = buchstabe-1
	drop if buchstabe > tmp_buch & hnr==hnr_max
	
* recode the numeric values to letters again
	tostring buchstabe, replace
	replace buchstabe="" if buchstabe=="0"
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch
	
*save tempfile (appended below)
	tempfile buchstabe_max 
	save `buchstabe_max'
}


* drop cases dealt with so far
	keep if missing(buchstaben_min)==0 & missing(buchstaben_max)==0
	
	
frame copy default tmp, replace
frame tmp {
*case4: assinment given for exactly one address (same street and same letter)
	keep if hnr_min==hnr_max & buchstaben_min==buchstaben_max
	gen buchstabe = buchstaben_min
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max 
	
*save tempfile (appended below)
	tempfile one_address 
	save `one_address'
}

frame copy default tmp, replace
frame tmp {
*case5: assignment given for exactly one street (but with different addresses (letters))
	keep if hnr_min==hnr_max & buchstaben_min!=buchstaben_max

* generate numeric variable for the first letter mentioned in the address (buchstaben_min)
	gen tmp_buch = "1" if buchstaben_min=="a"
	replace tmp_buch = "2" if buchstaben_min=="b"
	replace tmp_buch = "3" if buchstaben_min=="c"
	replace tmp_buch = "4" if buchstaben_min=="d"
	replace tmp_buch = "5" if buchstaben_min=="e"
	replace tmp_buch = "6" if buchstaben_min=="f"
	replace tmp_buch = "7" if buchstaben_min=="g"
	replace tmp_buch = "8" if buchstaben_min=="h"
	replace tmp_buch = "9" if buchstaben_min=="i"
	replace tmp_buch = "10" if buchstaben_min=="j"
	replace tmp_buch = "11" if buchstaben_min=="k"
	replace tmp_buch = "12" if buchstaben_min=="l"
	replace tmp_buch = "13" if buchstaben_min=="m"
	replace tmp_buch = "14" if buchstaben_min=="n"
	replace tmp_buch = "15" if buchstaben_min=="o"
	replace tmp_buch = "16" if buchstaben_min=="p"
	replace tmp_buch = "17" if buchstaben_min=="q"
	replace tmp_buch = "18" if buchstaben_min=="r"
	replace tmp_buch = "19" if buchstaben_min=="s"
	replace tmp_buch = "20" if buchstaben_min=="t"
	replace tmp_buch = "21" if buchstaben_min=="u"
	replace tmp_buch = "22" if buchstaben_min=="v"
	replace tmp_buch = "23" if buchstaben_min=="w"
	replace tmp_buch = "24" if buchstaben_min=="x"
	replace tmp_buch = "25" if buchstaben_min=="y"
	replace tmp_buch = "26" if buchstaben_min=="z"
	destring tmp_buch, replace

* generate numeric variable for the second letter mentioned in the address (buchstaben_max)
	gen tmp_buch2 = "1" if buchstaben_max=="a"
	replace tmp_buch2 = "2" if buchstaben_max=="b"
	replace tmp_buch2 = "3" if buchstaben_max=="c"
	replace tmp_buch2 = "4" if buchstaben_max=="d"
	replace tmp_buch2 = "5" if buchstaben_max=="e"
	replace tmp_buch2 = "6" if buchstaben_max=="f"
	replace tmp_buch2 = "7" if buchstaben_max=="g"
	replace tmp_buch2 = "8" if buchstaben_max=="h"
	replace tmp_buch2 = "9" if buchstaben_max=="i"
	replace tmp_buch2 = "10" if buchstaben_max=="j"
	replace tmp_buch2 = "11" if buchstaben_max=="k"
	replace tmp_buch2 = "12" if buchstaben_max=="l"
	replace tmp_buch2 = "13" if buchstaben_max=="m"
	replace tmp_buch2 = "14" if buchstaben_max=="n"
	replace tmp_buch2 = "15" if buchstaben_max=="o"
	replace tmp_buch2 = "16" if buchstaben_max=="p"
	replace tmp_buch2 = "17" if buchstaben_max=="q"
	replace tmp_buch2 = "18" if buchstaben_max=="r"
	replace tmp_buch2 = "19" if buchstaben_max=="s"
	replace tmp_buch2 = "20" if buchstaben_max=="t"
	replace tmp_buch2 = "21" if buchstaben_max=="u"
	replace tmp_buch2 = "22" if buchstaben_max=="v"
	replace tmp_buch2 = "23" if buchstaben_max=="w"
	replace tmp_buch2 = "24" if buchstaben_max=="x"
	replace tmp_buch2 = "25" if buchstaben_max=="y"
	replace tmp_buch2 = "26" if buchstaben_max=="z"
	destring tmp_buch2, replace

* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 26
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==26)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	drop if buchstabe < tmp_buch
	drop if buchstabe > tmp_buch2
	tostring buchstabe, replace
	
* recode the numeric values to letters again
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"

* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch tmp_buch2
	
*save tempfile (appended below)
	tempfile one_number 
	save `one_number'
}

* drop cases dealt with so far
	drop if hnr_min==hnr_max

	
*last remaining case (case6): first and second mentioned address contain letter and both have different numbers (e.g., 8b-11d)
	* generate numeric variable for the first letter mentioned in the address (buchstaben_min)
	gen tmp_buch = "1" if buchstaben_min=="a"
	replace tmp_buch = "2" if buchstaben_min=="b"
	replace tmp_buch = "3" if buchstaben_min=="c"
	replace tmp_buch = "4" if buchstaben_min=="d"
	replace tmp_buch = "5" if buchstaben_min=="e"
	replace tmp_buch = "6" if buchstaben_min=="f"
	replace tmp_buch = "7" if buchstaben_min=="g"
	replace tmp_buch = "8" if buchstaben_min=="h"
	replace tmp_buch = "9" if buchstaben_min=="i"
	replace tmp_buch = "10" if buchstaben_min=="j"
	replace tmp_buch = "11" if buchstaben_min=="k"
	replace tmp_buch = "12" if buchstaben_min=="l"
	replace tmp_buch = "13" if buchstaben_min=="m"
	replace tmp_buch = "14" if buchstaben_min=="n"
	replace tmp_buch = "15" if buchstaben_min=="o"
	replace tmp_buch = "16" if buchstaben_min=="p"
	replace tmp_buch = "17" if buchstaben_min=="q"
	replace tmp_buch = "18" if buchstaben_min=="r"
	replace tmp_buch = "19" if buchstaben_min=="s"
	replace tmp_buch = "20" if buchstaben_min=="t"
	replace tmp_buch = "21" if buchstaben_min=="u"
	replace tmp_buch = "22" if buchstaben_min=="v"
	replace tmp_buch = "23" if buchstaben_min=="w"
	replace tmp_buch = "24" if buchstaben_min=="x"
	replace tmp_buch = "25" if buchstaben_min=="y"
	replace tmp_buch = "26" if buchstaben_min=="z"
	destring tmp_buch, replace

* generate numeric variable for the second letter mentioned in the address (buchstaben_max)
	gen tmp_buch2 = "1" if buchstaben_max=="a"
	replace tmp_buch2 = "2" if buchstaben_max=="b"
	replace tmp_buch2 = "3" if buchstaben_max=="c"
	replace tmp_buch2 = "4" if buchstaben_max=="d"
	replace tmp_buch2 = "5" if buchstaben_max=="e"
	replace tmp_buch2 = "6" if buchstaben_max=="f"
	replace tmp_buch2 = "7" if buchstaben_max=="g"
	replace tmp_buch2 = "8" if buchstaben_max=="h"
	replace tmp_buch2 = "9" if buchstaben_max=="i"
	replace tmp_buch2 = "10" if buchstaben_max=="j"
	replace tmp_buch2 = "11" if buchstaben_max=="k"
	replace tmp_buch2 = "12" if buchstaben_max=="l"
	replace tmp_buch2 = "13" if buchstaben_max=="m"
	replace tmp_buch2 = "14" if buchstaben_max=="n"
	replace tmp_buch2 = "15" if buchstaben_max=="o"
	replace tmp_buch2 = "16" if buchstaben_max=="p"
	replace tmp_buch2 = "17" if buchstaben_max=="q"
	replace tmp_buch2 = "18" if buchstaben_max=="r"
	replace tmp_buch2 = "19" if buchstaben_max=="s"
	replace tmp_buch2 = "20" if buchstaben_max=="t"
	replace tmp_buch2 = "21" if buchstaben_max=="u"
	replace tmp_buch2 = "22" if buchstaben_max=="v"
	replace tmp_buch2 = "23" if buchstaben_max=="w"
	replace tmp_buch2 = "24" if buchstaben_max=="x"
	replace tmp_buch2 = "25" if buchstaben_max=="y"
	replace tmp_buch2 = "26" if buchstaben_max=="z"
	destring tmp_buch2, replace
	
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	replace buchstabe = buchstabe-1
	drop if buchstabe < tmp_buch & hnr_min==hnr
	drop if buchstabe > tmp_buch2 & hnr_max==hnr
	tostring buchstabe, replace
	
* recode the numeric values to letters again
	replace buchstabe="" if buchstabe=="0"
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"

* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch tmp_buch2
  
*append all datasets created before   
	append using `full_buchstabe'
	append using `no_buchstabe'
	append using `buchstabe_min'
	append using `buchstabe_max'
	append using `one_address'
	append using `one_number'

*merge exact case (e.g., 4a in ab4a or 6a and 8b in 6a-8b) and drop merged addresses
*they might occur a second time with a different precinct due to partly imprecise electoral rolls 
	merge m:1 stadtbez sb merge_strasse hnr buchstabe using `exact', assert(1 3) keep(1) nogen
*append these exact cases, where the correct precinct is known for sure
	append using `exact'	
	
	
*generate addresse number (number and letter)
	tostring hnr, replace 
	gen nummer = hnr + buchstabe
	lab var nummer "address number"

* destring precinct and district ID
	destring stadtbez sb, replace
	*tostring stadtbez sb, replace

* Handle duplicate addresses:
	* 1) duplicates within district across precincts: drop BOTH b/c definitive assignment to precinct not possible
	* 2) duplicates within precinct (sb): keep one copy
	duplicates drop stadtbez merge_strasse nummer sb, force
	bys stadtbez merge_strasse nummer: keep if _N==1

* keep only necessary variables
	keep merge_strasse nummer sb stadtbez

*save
	save "$tmp/strassen_kow14_voll_buchstabe.dta", replace